clear all
set more off

use "$working/ppt_droplist.dta", clear

* Drop all observations missing sales information (we cannot do anything with this type of missing)
drop if missing(sales_value32)

* Drop any product name that is fuzzy matched (with the score above 0.8 out of 1) unless quantity is not missing.
drop if score >= 0.8 & !missing(score) & missing(production11)
drop idm idu Uproduct_name5 _merge


*
egen sales_firm = total(sales_value32), by(products_cocode1 prod_date3)
gen sales_share = sales_value32/sales_firm
egen sales_share_with_quantity = total(sales_share) if !missing(production11), by(products_cocode1 prod_date3)
bys products_cocode1 (prod_date3 production11): carryforward sales_share_with_quantity, gen(quantity_revenue_share) dynamic_condition(prod_date3[_n-1] == prod_date3)
replace quantity_revenue_share = 0 if missing(quantity_revenue_share)
drop sales_firm sales_share sales_share_with_quantity

* standardize production units
rename production_unit13 unit1
replace unit1 = upper(unit1)
merge m:1 unit1 using "$inputs/handcoded/unit_conversion.dta"
keep if _merge ==3
drop _merge unit1
rename unit product_unit
rename conversion product_conversion
gen production = production11 * product_conversion

* standardize sale quantity units
rename sales_unit31 unit1
replace unit1 = upper(unit1)
merge m:1 unit1 using "$inputs/handcoded/unit_conversion.dta"
keep if _merge ==3
drop _merge unit1
rename unit sales_unit
rename conversion sales_conversion
gen sales_quantity = sales_qty * sales_conversion

* 
keep products_cocode1 prod_date3 ann_rep_months4 product_name5 product_name_mst7 production product_unit sales_quantity sales_unit sales_value32 quantity_revenue_share
order products_cocode1 prod_date3 ann_rep_months4 product_name5 product_name_mst7 production product_unit sales_quantity sales_unit sales_value32 quantity_revenue_share
sort products_cocode1 prod_date3 product_name5 
duplicates drop
save "$working/ppt_clean.dta", replace
